import requests
from bs4 import BeautifulSoup
import re

url="http://news.scu.edu.cn/"
subUrl=[]
information=[]
passage=[]
imgUrl=[]
picNum=0
title=[]
pagePicNum=[]
num=0

response=requests.get("http://news.scu.edu.cn/ztxw/shfw.htm")
response.encoding = response.apparent_encoding
soup=BeautifulSoup(response.text, "html.parser")
getpageurl=soup.find_all('a', class_='c195338')
for i in range(len(getpageurl)):
    get1=re.findall(r'href.*htm', str(getpageurl[i]))
    subUrl.append(url+get1[0][9:])
    title.append(getpageurl[i].get_text())
for y in range(len(subUrl)):
    response=requests.get((subUrl[y]))
    response.encoding=response.apparent_encoding
    soup=BeautifulSoup(response.text, "html.parser")
    getinfo1=soup.find('div', style='color:#666666;font-size:14px;line-height:30px;border-top:1px solid #e3e3e3;border-bottom:1px solid #e3e3e3;margin-bottom:10px;')
    information.append(getinfo1.get_text())
    getpassage=soup.find('div', class_='v_news_content')
    passage.append(getpassage.get_text())
    imgArry=getpassage.find_all('img', class_='img_vsb_content')
    for k in range(len(imgArry)):
        imgUrl.append(url+''.join(re.findall(r"orisrc=(.+?)src",str(imgArry[k])))[1:])
    pagePicNum.append(len(imgArry))
